with whiskynote.be data

Author

Tony Duan

Code
library(tidyverse)
library(rvest)
Code
packageVersion("rvest")

1 loop all year page

Code
year_list=seq(2010,2024)
year_list
Code
url_list=paste0('https://www.whiskynotes.be/',year_list)
url_list
Code
bottle_list=c()
topic_list=c()
topic_link_list=c()
all_year_list_topic=c()
all_year_list_bottle=c()

for (i in url_list){
  year=tail(unlist(strsplit(i, split = "/")),1)
  print(year)
  print(i)
  year_ur=i
  year_page <- read_html(year_ur)
  bottle001 <- year_page %>% html_elements("p")%>% html_text2()
  bottle003=unlist(strsplit(bottle001,"\n"))
  
  
  topic001 <- year_page %>% html_elements(".archive-link") %>% html_text2()
  topic_link_001 <- year_page %>%
    html_elements(css = ".entry-permalink")%>% html_attr("href")

  year_list_topic=rep(year,length(topic001))
  year_list_bottle=rep(year,length(bottle003))
  
  all_year_list_topic=c(all_year_list_topic,year_list_topic)
  all_year_list_bottle=c(all_year_list_bottle,year_list_bottle)
  
  bottle_list=c(bottle_list,bottle003)
  topic_list=c(topic_list,topic001)
  topic_link_list=c(topic_link_list,topic_link_001)
  
  Sys.sleep(1)
  }

2 combine

Code
data=tibble(topic_list,topic_link_list,all_year_list_topic)
Code
bottle003=tibble(bottle_list,all_year_list_bottle)

3 output

Code
library(openxlsx)
list_of_datasets <- list("topic" = data, "bottle" = bottle003)

write.xlsx(list_of_datasets, file = "./output/all year page.xlsx")

4 reference:

Back to top